Purpose

This markdown follows differential expression analyses from deseq2_analyses.Rmd. DEGs from P70 (39) pre-cystic data are used as input for signatureSearch. Results are annotated with Drug Repurposing Hub and onSides data.
Data Sets - ’39 / Pre-Cystic P70: GSE149739
- ’19 / Cystic P21: GSE134719
- ’56 / Cystic P28: GSE69556

Load libraries

library(signatureSearch)
library(ggplot2)
library(ggalluvial)
#library(readr)
library(signatureSearch)
library(ExperimentHub); library(rhdf5)
library(here)
library(dplyr)
library(stringr)
source(here("code", "functions.R"))

Pull LINCS data from Experiment Hub

eh <- ExperimentHub()
cmap <- eh[["EH3223"]]; cmap_expr <- eh[["EH3224"]] #cmap data locations in experimenthub
lincs <- eh[["EH3226"]]; lincs_expr <- eh[["EH3227"]] #lincs data locations in experimenthub
h5ls(lincs) #reading hdf5 file
##   group     name       otype dclass           dim
## 0     /    assay H5I_DATASET  FLOAT 12328 x 45956
## 1     / colnames H5I_DATASET STRING     45956 x 1
## 2     / rownames H5I_DATASET STRING     12328 x 1
db_path <- system.file("extdata", "sample_db.h5", package = "signatureSearch")

Pre-cystic (P70) Signature Reversion

Load in top up and down DEGs

hom_p70_UP <- read.csv(here("res", "deseq2_outputs", "p70_signature_UP.csv"))
hom_p70_DOWN <- read.csv(here("res", "deseq2_outputs", "p70_signature_DOWN.csv"))

Run signatureSearch, filter for kidney cell lines

gess_kidneycells_p70 <- ssRun(upgenes = hom_p70_UP$entrezgene_id, downgenes = hom_p70_DOWN$entrezgene_id, cells = "kidney")
## 100 / 100 genes in up set share identifiers with reference database
## 31 / 31 genes in down set share identifiers with reference database

Total results (inversely related)

dim(gess_kidneycells_p70)
## [1] 730  16

Top 30 Results by NCS

head(gess_kidneycells_p70, 30)
## # A tibble: 30 × 16
##    pert       cell  type  trend   WTCS WTCS_…¹ WTCS_…²   NCS   Tau TauRe…³ NCSct
##    <chr>      <chr> <chr> <chr>  <dbl>   <dbl>   <dbl> <dbl> <dbl>   <int> <dbl>
##  1 etoposide  HA1E  trt_… down  -0.713 1.25e-5 1.25e-4 -2.03 -99.6    2241 -1.78
##  2 SA-84869   NKDBA trt_… down  -0.699 1.31e-5 1.25e-4 -1.69  NA       229 -1.40
##  3 pyrvinium… HA1E  trt_… down  -0.663 1.47e-5 1.25e-4 -1.88 -99.6    2241 -1.69
##  4 BRD-K9231… HA1E  trt_… down  -0.663 1.47e-5 1.25e-4 -1.88 -99.6    2241 -1.38
##  5 SA-1919710 NKDBA trt_… down  -0.662 1.47e-5 1.25e-4 -1.60  NA       229 -1.51
##  6 BRD-K9519… HA1E  trt_… down  -0.646 1.54e-5 1.25e-4 -1.84 -99.0    2241 -1.52
##  7 amsacrine  HA1E  trt_… down  -0.639 1.57e-5 1.25e-4 -1.81 -98.8    2241 -1.59
##  8 BRD-K6087… HA1E  trt_… down  -0.634 1.59e-5 1.25e-4 -1.80 -98.8    2241 -1.61
##  9 LY-294002  NKDBA trt_… down  -0.630 1.61e-5 1.25e-4 -1.52  NA       229 -1.07
## 10 thapsigar… HA1E  trt_… down  -0.628 1.62e-5 1.25e-4 -1.79 -98.4    2241 -1.56
## # … with 20 more rows, 5 more variables: N_upset <int>, N_downset <int>,
## #   t_gn_sym <chr>, MOAss <chr>, PCIDss <chr>, and abbreviated variable names
## #   ¹​WTCS_Pval, ²​WTCS_FDR, ³​TauRefSize
write.csv(gess_kidneycells_p70, file = here("res", "sigsearch_outputs", "gess_kidneycells_p70.csv"))

Repurposing Hub Annotations

LINCS Drug Repurposing Hub, contains drug names, MOAs, original indications, and targets

repurpHub <- read.delim(here("data", "Repurposing_Hub_export.txt"))

rHub_p70 <- dplyr::inner_join(gess_kidneycells_p70, repurpHub, by= c("pert" = "Name"))

P70/’39 Already-Launched Drugs

rhub_launched_p70 <- dplyr::filter(rHub_p70, Phase == "Launched")
write.csv(rhub_launched_p70, file = here("res", "sigsearch_outputs", "rhub_launcheddrugs_p70.csv"))

Alluvial plot of drugs-targets with just launched drugs

rhub_launched_p70_top30 <- dplyr::slice_min(rhub_launched_p70, order_by = Tau, n = 30)

rhub_launched_p70_top_septarget <- tidyr::separate_rows(rhub_launched_p70_top30, Target, sep = ", ")
#remove drugs with empty 'target' field
rhub_launched_p70_top_septarget <- dplyr::filter(rhub_launched_p70_top_septarget, Target != "")

plot

target_alluvial <- ggplot(data = rhub_launched_p70_top_septarget,
       aes(axis1 = pert, axis2 = Target)) +
  geom_alluvium(color = "black", aes(fill = WTCS)) + # , colour = trend)) + 
  geom_stratum(width = 3/12, color = "black") + #the border of the stratum
  scale_x_discrete(expand = c(.07, .07)) + 
  scale_y_discrete(expand = c(.007, .007)) + 
  scale_fill_viridis_c(aesthetics = c("colour", "fill")) +
  geom_text(stat = "stratum", size = 3.5, fontface = "bold", aes(label = after_stat(stratum))) +
  labs(title = "Targets of Top 20 Launched Drugs") +
  theme_void() 


target_alluvial
## Warning: Using the `size` aesthetic in this geom was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` in the `default_aes` field and elsewhere instead.

MOA’s separated, string wrap labels

rhub_launched_p70_top_moasep <- tidyr::separate_rows(rhub_launched_p70_top30, MOA, sep = ",")

rhub_launched_p70_top_moasep$MOA <- stringr::str_wrap(rhub_launched_p70_top_moasep$MOA, width = 20)

rhub_launched_p70_top_moasep$Disease.Area <- stringr::str_wrap(rhub_launched_p70_top_moasep$Disease.Area, width = 30)

plot

moa_alluvial <- ggplot(data = rhub_launched_p70_top_moasep,
       aes(axis1 = pert, axis2 = MOA)) +
  geom_alluvium(color = "black", aes(fill = WTCS)) + # , colour = trend)) + 
  geom_stratum(width = 3/12, color = "black") + #the border of the stratum
  scale_x_discrete(expand = c(.07, .07)) + 
  scale_y_discrete(expand = c(.007, .007)) + 
  scale_fill_viridis_c(aesthetics = c("colour", "fill")) +
  geom_text(stat = "stratum", size = 3.5, fontface = "bold", aes(label = after_stat(stratum))) +
  labs(title = "MOA of Top 30 Launched Drugs") +
  theme_void() 


moa_alluvial

ggsave(here("res", "sigsearch_outputs", "moa_alluvial_wtcs_220819.pdf"), moa_alluvial, width = 10, height = 15)

P70/’39 Already-Launched Drugs

rhub_launched_p70 <- dplyr::filter(rHub_p70, Phase == "Launched")

Save

write.csv(rHub_p70, file = here("res", "sigsearch_outputs", "ssresults_rHub_p70.csv"))

FDA approved drugs

Code adapted from Jen Fisher
Data: database was downloaded in August 2022 from https://www.fda.gov/drugs/drug-approvals-and-databases/drugsfda-data-files (Accessed date: August 2022). From the information about drugs (i.e, marketing status, application information, etc.), we created a table with information including the method of drug delivery, dosage, active ingredients, and FDA approval (FDA_products_status_220816.csv) FDA terminology https://www.fda.gov/drugs/drug-approvals-and-databases/drugsfda-glossary-terms#:~:text=Tentative%20Approval&text=FDA%20delays%20final%20approval%20of,market%20the%20generic%20drug%20product. Drugs were filtered for unique names, regardless of form (i.e. injectible, solution, tablet, etc) and further filtered for Marketing Statuses that were not Discontinued (leaving Prescription, Over-the-counter, and Tentative Approval, due to patents)

fda_marketing_status <- read.delim(here("data/fda_data", "MarketingStatus.txt"))
frda_product_info <- read.delim(here("data/fda_data","Products.txt"))

fda_marketing_status$ID<- paste(fda_marketing_status$ApplNo, fda_marketing_status$ProductNo, sep= "_")

frda_product_info$ID<- paste(frda_product_info$ApplNo, frda_product_info$ProductNo, sep= "_")

Add marketing status description to marketing status data

marketing_lookup <- read.delim(here("data/fda_data", "MarketingStatus_Lookup.txt"))
fda_marketing_status <- merge(fda_marketing_status, marketing_lookup, by = "MarketingStatusID")

Combine drug info and status

fda_data <- merge(frda_product_info, fda_marketing_status, by = "ID")
write.csv(fda_data, file = here("data", "FDA_products_status_220816.csv"))
fda_data <- read.csv(here("data", "FDA_products_status_220816.csv"), row.names = 1)
#only unique drugs, regardless of concentration or form
fda_approved <- dplyr::filter(fda_data, MarketingStatusDescription != "Discontinued")
fda_approved <- dplyr::distinct(fda_approved, ActiveIngredient, .keep_all = TRUE)
write.csv(fda_approved, file = here("data", "FDA_approved_unique_ingredients.csv"))

Repurposing hub ‘launched’ drugs

rhub_launched_p70 <- read.csv(here("res", "sigsearch_outputs", "rhub_launcheddrugs_p70.csv"), row.names = 1)
fda_approved <- read.csv(here("data", "FDA_approved_unique_ingredients.csv"), row.names = 1)

Test for FDA approval

fda_approved_p70 <- FDA_APPROVAL_CHECK(rhub_launched_p70$pert, fda_approved)
fda_approved_p70 <- mutate(rhub_launched_p70, fda_approval = fda_approved_p70)
fda_approved_p70 <- dplyr::filter(fda_approved_p70, fda_approval == TRUE)
#results for both kidney cell lines for some drugs
fda_approved_p70 <- dplyr::distinct(fda_approved_p70, pert, .keep_all = TRUE)

save FDA-approved drug candidates

saveRDS(fda_approved_p70, file = here("res", "sigsearch_outputs", "p70_fdaapproved_drugs.rds"))
write.csv(fda_approved_p70, file = here("res", "sigsearch_outputs", "p70_fdaapproved_drugs.csv"))

MOA’s separated, string wrap labels

fda_approved_p70_moa <- tidyr::separate_rows(fda_approved_p70, MOA, sep = ",")

fda_approved_p70_moa_top <- fda_approved_p70_moa %>% dplyr::slice_min(order_by = NCS, n = 42) %>% mutate(pert = forcats::fct_reorder(pert, NCS))

fda_approved_p70_moa_top$MOA <- stringr::str_trunc(fda_approved_p70_moa_top$MOA, width = 38)

#####MOA plot

moa_alluvial <- ggplot(fda_approved_p70_moa_top,
       aes(axis1 = pert, axis2 = MOA)) +
  geom_alluvium(color = "black", aes(fill = NCS)) + # , colour = trend)) + 
  geom_stratum(width = 5/16, color = "black") + #the border of the stratum
  scale_y_discrete(limits = ) + 
  scale_fill_viridis_c(aesthetics = c("colour", "fill"), direction = -1) +
  geom_text(stat = "stratum", size = 3.25, fontface = "bold", aes(label = after_stat(stratum))) +
  #labs(title = "MOA of Top FDA-Approved Drugs")  + 
  theme(axis.title.x = element_blank(), plot.title = element_text(face = "bold", hjust = 0.5), axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank()) + ggtitle(label = "MOA of Top FDA-Approved Drugs") + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank() )

moa_alluvial

ggsave(here("res", "sigsearch_outputs", "moa_fda_alluvial.png"), moa_alluvial, width = 12, height = 7, dpi = 600)

Frequency of MOA’s

moa_freq <- as.data.frame(table(fda_approved_p70_moa$MOA))
other <- moa_freq$Var1[moa_freq$Freq==1]
#other <- cbind(Var1 = as.character(other), Freq = rep("other", length(other)))
multi_moa <- dplyr::filter(moa_freq, Freq > 1)
multi_moa$Var1 <- as.character(multi_moa$Var1)
moa_freqs <- rbind(multi_moa, c(Var1 = "other", Freq = length(other)))
moa_freqs$Freq <- as.numeric(moa_freqs$Freq)
#moa_freq$Var1 <- stringr::str_replace(moa_freq$Var1 %in% other == "other")

#moa_freq <- dplyr::filter(moa_freq, Freq > 1)
# Stacked + percent
moa_freqs %>% mutate(MOA = forcats::fct_reorder(Var1, Freq)) %>%
ggplot(aes(fill=MOA, y=Freq, x="MOA")) + 
    geom_bar(position="fill", stat="identity") + 
  scale_fill_viridis_d(aesthetics = c("colour", "fill")) 

#visualize
moa_freq <- dplyr::filter(moa_freq, Freq > 2)
moa_freq$Var1 <- str_wrap(moa_freq$Var1, width = 25)
moa_frq <- moa_freq %>% mutate(MOA = forcats::fct_reorder(Var1, desc(Freq))) %>% ggplot(aes(x = MOA, y = Freq, fill = Freq)) +  
  geom_bar(stat = "identity") + 
  scale_fill_viridis_c(aesthetics = c("colour", "fill")) +
  theme(axis.text.x = element_text(size = 13.5, face = "bold", angle = 50, vjust = 0.97, hjust=0.95), axis.title.x = element_blank(), plot.title = element_text(size = 14, face = "bold", hjust = 0.5)) + ggtitle(label = "Most Frequent Mechanisms of Action") + 
  theme(plot.margin = unit(c(.2,.2,.2,1.5), "cm"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black")) 
  

moa_frq

ggsave(filename = here("res/sigsearch_outputs", "p70_fdaapproved_moa_freq.pdf"), moa_frq, height = 5.5, width = 9.5)

Frequency of original disease area indications

indication_freq <- as.data.frame(table(fda_approved_p70_moa$Disease.Area))
indication_freq <- dplyr::filter(indication_freq, Freq > 2)

plot

#visualize
indication_plot <- indication_freq %>% mutate(indication = forcats::fct_reorder(Var1, desc(Freq))) %>% ggplot(aes(x = indication, y = Freq, fill = Freq)) +  
  geom_bar(stat = "identity") + 
  scale_fill_viridis_c(aesthetics = c("colour", "fill")) +
   theme(axis.text.x = element_text(size = 13.5, face = "bold", angle = 50, vjust = 0.92, hjust=0.85), axis.title.x = element_blank(), plot.title = element_text(size = 14, face = "bold", hjust = 0.5)) + ggtitle(label = "Most Frequent Disease Area Indications") + 
  theme(plot.margin = unit(c(.2,.2,.2,1), "cm"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"))


indication_plot

ggsave(filename =here("res/sigsearch_outputs", "p70_fdaapproved_origindication_freq.pdf"), indication_plot, height = 5.5, width = 8)

Versions

R.Version()
## $platform
## [1] "x86_64-apple-darwin17.0"
## 
## $arch
## [1] "x86_64"
## 
## $os
## [1] "darwin17.0"
## 
## $system
## [1] "x86_64, darwin17.0"
## 
## $status
## [1] ""
## 
## $major
## [1] "4"
## 
## $minor
## [1] "2.0"
## 
## $year
## [1] "2022"
## 
## $month
## [1] "04"
## 
## $day
## [1] "22"
## 
## $`svn rev`
## [1] "82229"
## 
## $language
## [1] "R"
## 
## $version.string
## [1] "R version 4.2.0 (2022-04-22)"
## 
## $nickname
## [1] "Vigorous Calisthenics"
installed.packages()[names(sessionInfo()$otherPkgs), "Version"]
##  signatureSearchData              stringr                dplyr 
##             "1.10.0"              "1.4.1"             "1.0.10" 
##                 here                rhdf5        ExperimentHub 
##              "1.0.1"             "2.40.0"              "2.4.0" 
##        AnnotationHub        BiocFileCache               dbplyr 
##              "3.4.0"              "2.4.0"              "2.2.1" 
##           ggalluvial              ggplot2      signatureSearch 
##             "0.12.3"              "3.4.0"             "1.10.0" 
## SummarizedExperiment              Biobase        GenomicRanges 
##             "1.26.1"             "2.56.0"             "1.48.0" 
##         GenomeInfoDb              IRanges            S4Vectors 
##             "1.32.4"             "2.30.1"             "0.34.0" 
##         BiocGenerics       MatrixGenerics          matrixStats 
##             "0.42.0"              "1.8.1"             "0.62.0" 
##                 Rcpp 
##              "1.0.9"